{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 統計的仮説検定"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-08-24T07:05:45.085823Z",
     "start_time": "2018-08-24T07:05:44.414922Z"
    }
   },
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import pandas as pd\n",
    "from scipy import stats\n",
    "\n",
    "%precision 3\n",
    "np.random.seed(1111)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-08-24T07:05:45.104555Z",
     "start_time": "2018-08-24T07:05:45.087110Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([122.02, 131.73, 130.6 , 131.82, 132.05, 126.12, 124.43, 132.89,\n",
       "       122.79, 129.95, 126.14, 134.45, 127.64, 125.68])"
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df = pd.read_csv('../data/ch11_potato.csv')\n",
    "sample = np.array(df['重さ'])\n",
    "sample"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-08-24T07:05:45.108071Z",
     "start_time": "2018-08-24T07:05:45.105521Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "128.451"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "s_mean = np.mean(sample)\n",
    "s_mean"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 統計的仮説検定とは"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 統計的仮説検定の流れ"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-08-24T07:05:45.112973Z",
     "start_time": "2018-08-24T07:05:45.109309Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "128.681"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "rv = stats.norm(130, np.sqrt(9/14))\n",
    "rv.isf(0.95)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-08-24T07:05:45.117198Z",
     "start_time": "2018-08-24T07:05:45.113995Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "-1.932"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "z = (s_mean - 130) / np.sqrt(9/14)\n",
    "z"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-08-24T07:05:45.122064Z",
     "start_time": "2018-08-24T07:05:45.118103Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "-1.645"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "rv = stats.norm()\n",
    "rv.isf(0.95)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-08-24T07:05:45.126401Z",
     "start_time": "2018-08-24T07:05:45.123042Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.027"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "rv.cdf(z)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 片側検定と両側検定"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-08-24T07:05:45.131360Z",
     "start_time": "2018-08-24T07:05:45.127654Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "-1.932"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "z = (s_mean - 130) / np.sqrt(9/14)\n",
    "z"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-08-24T07:05:45.137501Z",
     "start_time": "2018-08-24T07:05:45.132802Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(-1.960, 1.960)"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "rv = stats.norm()\n",
    "rv.interval(0.95)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-08-24T07:05:45.141573Z",
     "start_time": "2018-08-24T07:05:45.138698Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.053"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "rv.cdf(z) * 2"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 仮説検定における2つの過誤"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-08-24T07:05:45.145207Z",
     "start_time": "2018-08-24T07:05:45.142707Z"
    }
   },
   "outputs": [],
   "source": [
    "rv = stats.norm(130, 3)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-08-24T07:05:45.610816Z",
     "start_time": "2018-08-24T07:05:45.146318Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.053"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "c = stats.norm().isf(0.95)\n",
    "n_samples = 10000\n",
    "cnt = 0\n",
    "for _ in range(n_samples):\n",
    "    sample_ = np.round(rv.rvs(14), 2)\n",
    "    s_mean_ = np.mean(sample_)\n",
    "    z = (s_mean_ - 130) / np.sqrt(9/14)\n",
    "    if z < c:\n",
    "        cnt += 1\n",
    "cnt / n_samples"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-08-24T07:05:45.614545Z",
     "start_time": "2018-08-24T07:05:45.611974Z"
    }
   },
   "outputs": [],
   "source": [
    "rv = stats.norm(128, 3)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-08-24T07:05:46.072562Z",
     "start_time": "2018-08-24T07:05:45.615736Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.197"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "c = stats.norm().isf(0.95)\n",
    "n_samples = 10000\n",
    "cnt = 0\n",
    "for _ in range(n_samples):\n",
    "    sample_ = np.round(rv.rvs(14), 2)\n",
    "    s_mean_ = np.mean(sample_)\n",
    "    z = (s_mean_ - 130) / np.sqrt(9/14)\n",
    "    if z >= c:\n",
    "        cnt += 1\n",
    "        \n",
    "cnt / n_samples"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 仮説検定"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 正規分布の母平均の検定(母分散既知)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-08-24T07:05:46.084590Z",
     "start_time": "2018-08-24T07:05:46.073725Z"
    }
   },
   "outputs": [],
   "source": [
    "def pmean_test(sample, mean0, p_var, alpha=0.05):\n",
    "    s_mean = np.mean(sample)\n",
    "    n = len(sample)\n",
    "    rv = stats.norm()\n",
    "    interval = rv.interval(1-alpha)\n",
    "\n",
    "    z = (s_mean - mean0) / np.sqrt(p_var/n)\n",
    "    if interval[0] <= z <= interval[1]:\n",
    "        print('帰無仮説を採択')\n",
    "    else:\n",
    "        print('帰無仮説を棄却')\n",
    "\n",
    "    if z < 0:\n",
    "        p = rv.cdf(z) * 2\n",
    "    else:\n",
    "        p = (1 - rv.cdf(z)) * 2\n",
    "    print(f'p値は{p:.3f}')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-08-24T07:05:46.089708Z",
     "start_time": "2018-08-24T07:05:46.085814Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "帰無仮説を採択\n",
      "p値は0.053\n"
     ]
    }
   ],
   "source": [
    "pmean_test(sample, 130, 9)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 正規分布の母分散の検定"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-08-24T07:05:46.102142Z",
     "start_time": "2018-08-24T07:05:46.090827Z"
    }
   },
   "outputs": [],
   "source": [
    "def pvar_test(sample, var0, alpha=0.05):\n",
    "    u_var = np.var(sample, ddof=1)\n",
    "    n = len(sample)\n",
    "    rv = stats.chi2(df=n-1)\n",
    "    interval = rv.interval(1-alpha)\n",
    "    \n",
    "    y = (n-1) * u_var / var0\n",
    "    if interval[0] <= y <= interval[1]:\n",
    "        print('帰無仮説を採択')\n",
    "    else:\n",
    "        print('帰無仮説を棄却')\n",
    "\n",
    "    if y < rv.isf(0.5):\n",
    "        p = rv.cdf(y) * 2\n",
    "    else:\n",
    "        p = (1 - rv.cdf(y)) * 2\n",
    "    print(f'p値は{p:.3f}')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-08-24T07:05:46.108340Z",
     "start_time": "2018-08-24T07:05:46.103388Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "帰無仮説を採択\n",
      "p値は0.085\n"
     ]
    }
   ],
   "source": [
    "pvar_test(sample, 9)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-05-18T08:45:31.171707Z",
     "start_time": "2018-05-18T08:45:31.169401Z"
    }
   },
   "source": [
    "### 正規分布の母平均の検定(母分散未知)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-08-24T07:05:46.121314Z",
     "start_time": "2018-08-24T07:05:46.109509Z"
    }
   },
   "outputs": [],
   "source": [
    "def pmean_test(sample, mean0, alpha=0.05):\n",
    "    s_mean = np.mean(sample)\n",
    "    u_var = np.var(sample, ddof=1)\n",
    "    n = len(sample)\n",
    "    rv = stats.t(df=n-1)\n",
    "    interval = rv.interval(1-alpha)\n",
    "\n",
    "    t = (s_mean - mean0) / np.sqrt(u_var/n)\n",
    "    if interval[0] <= t <= interval[1]:\n",
    "        print('帰無仮説を採択')\n",
    "    else:\n",
    "        print('帰無仮説を棄却')\n",
    "\n",
    "    if t < 0:\n",
    "        p = rv.cdf(t) * 2\n",
    "    else:\n",
    "        p = (1 - rv.cdf(t)) * 2\n",
    "    print(f'p値は{p:.3f}')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-08-24T07:05:46.126873Z",
     "start_time": "2018-08-24T07:05:46.122475Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "帰無仮説を採択\n",
      "p値は0.169\n"
     ]
    }
   ],
   "source": [
    "pmean_test(sample, 130)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-08-24T07:05:46.131732Z",
     "start_time": "2018-08-24T07:05:46.128023Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(-1.455, 0.169)"
      ]
     },
     "execution_count": 21,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "t, p = stats.ttest_1samp(sample, 130)\n",
    "t, p"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 2標本問題に関する仮説検定"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 対応のあるt検定"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-08-24T07:05:46.142074Z",
     "start_time": "2018-08-24T07:05:46.132876Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(20, 2)\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>前</th>\n",
       "      <th>後</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>59</td>\n",
       "      <td>41</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>52</td>\n",
       "      <td>63</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>55</td>\n",
       "      <td>68</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>61</td>\n",
       "      <td>59</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>59</td>\n",
       "      <td>84</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    前   後\n",
       "0  59  41\n",
       "1  52  63\n",
       "2  55  68\n",
       "3  61  59\n",
       "4  59  84"
      ]
     },
     "execution_count": 22,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "training_rel = pd.read_csv('../data/ch11_training_rel.csv')\n",
    "print(training_rel.shape)\n",
    "training_rel.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-08-24T07:05:46.170624Z",
     "start_time": "2018-08-24T07:05:46.143168Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>前</th>\n",
       "      <th>後</th>\n",
       "      <th>差</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>59</td>\n",
       "      <td>41</td>\n",
       "      <td>-18</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>52</td>\n",
       "      <td>63</td>\n",
       "      <td>11</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>55</td>\n",
       "      <td>68</td>\n",
       "      <td>13</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>61</td>\n",
       "      <td>59</td>\n",
       "      <td>-2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>59</td>\n",
       "      <td>84</td>\n",
       "      <td>25</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    前   後   差\n",
       "0  59  41 -18\n",
       "1  52  63  11\n",
       "2  55  68  13\n",
       "3  61  59  -2\n",
       "4  59  84  25"
      ]
     },
     "execution_count": 23,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "training_rel['差'] = training_rel['後'] - training_rel['前']\n",
    "training_rel.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-08-24T07:05:46.175732Z",
     "start_time": "2018-08-24T07:05:46.171981Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.040"
      ]
     },
     "execution_count": 24,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "t, p = stats.ttest_1samp(training_rel['差'], 0)\n",
    "p"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-08-24T07:05:46.180719Z",
     "start_time": "2018-08-24T07:05:46.176793Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.040"
      ]
     },
     "execution_count": 25,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "t, p = stats.ttest_rel(training_rel['後'], training_rel['前'])\n",
    "p"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 対応のないt検定"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-08-24T07:05:46.188822Z",
     "start_time": "2018-08-24T07:05:46.181763Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(20, 2)\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>A</th>\n",
       "      <th>B</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>47</td>\n",
       "      <td>49</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>50</td>\n",
       "      <td>52</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>37</td>\n",
       "      <td>54</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>60</td>\n",
       "      <td>48</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>39</td>\n",
       "      <td>51</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    A   B\n",
       "0  47  49\n",
       "1  50  52\n",
       "2  37  54\n",
       "3  60  48\n",
       "4  39  51"
      ]
     },
     "execution_count": 26,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "training_ind = pd.read_csv('../data/ch11_training_ind.csv')\n",
    "print(training_ind.shape)\n",
    "training_ind.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-08-24T07:05:46.194002Z",
     "start_time": "2018-08-24T07:05:46.189948Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.087"
      ]
     },
     "execution_count": 27,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "t, p = stats.ttest_ind(training_ind['A'], training_ind['B'],\n",
    "                       equal_var=False)\n",
    "p"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### ウィルコクソンの符号付き順位検定"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-08-24T07:05:46.201801Z",
     "start_time": "2018-08-24T07:05:46.195183Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>前</th>\n",
       "      <th>後</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>59</td>\n",
       "      <td>41</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>52</td>\n",
       "      <td>63</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>55</td>\n",
       "      <td>68</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>61</td>\n",
       "      <td>59</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>59</td>\n",
       "      <td>84</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>45</td>\n",
       "      <td>37</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    前   後\n",
       "0  59  41\n",
       "1  52  63\n",
       "2  55  68\n",
       "3  61  59\n",
       "4  59  84\n",
       "5  45  37"
      ]
     },
     "execution_count": 28,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "training_rel = pd.read_csv('../data/ch11_training_rel.csv')\n",
    "toy_df = training_rel[:6].copy()\n",
    "toy_df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-08-24T07:05:46.209309Z",
     "start_time": "2018-08-24T07:05:46.203035Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>前</th>\n",
       "      <th>後</th>\n",
       "      <th>差</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>59</td>\n",
       "      <td>41</td>\n",
       "      <td>-18</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>52</td>\n",
       "      <td>63</td>\n",
       "      <td>11</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>55</td>\n",
       "      <td>68</td>\n",
       "      <td>13</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>61</td>\n",
       "      <td>59</td>\n",
       "      <td>-2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>59</td>\n",
       "      <td>84</td>\n",
       "      <td>25</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>45</td>\n",
       "      <td>37</td>\n",
       "      <td>-8</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    前   後   差\n",
       "0  59  41 -18\n",
       "1  52  63  11\n",
       "2  55  68  13\n",
       "3  61  59  -2\n",
       "4  59  84  25\n",
       "5  45  37  -8"
      ]
     },
     "execution_count": 29,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "diff = toy_df['後'] - toy_df['前']\n",
    "toy_df['差'] = diff\n",
    "toy_df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-08-24T07:05:46.217605Z",
     "start_time": "2018-08-24T07:05:46.210468Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>前</th>\n",
       "      <th>後</th>\n",
       "      <th>差</th>\n",
       "      <th>順位</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>59</td>\n",
       "      <td>41</td>\n",
       "      <td>-18</td>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>52</td>\n",
       "      <td>63</td>\n",
       "      <td>11</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>55</td>\n",
       "      <td>68</td>\n",
       "      <td>13</td>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>61</td>\n",
       "      <td>59</td>\n",
       "      <td>-2</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>59</td>\n",
       "      <td>84</td>\n",
       "      <td>25</td>\n",
       "      <td>6</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>45</td>\n",
       "      <td>37</td>\n",
       "      <td>-8</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    前   後   差  順位\n",
       "0  59  41 -18   5\n",
       "1  52  63  11   3\n",
       "2  55  68  13   4\n",
       "3  61  59  -2   1\n",
       "4  59  84  25   6\n",
       "5  45  37  -8   2"
      ]
     },
     "execution_count": 30,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "rank = stats.rankdata(abs(diff)).astype(int)\n",
    "toy_df['順位'] = rank\n",
    "toy_df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-08-24T07:05:46.223577Z",
     "start_time": "2018-08-24T07:05:46.218864Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(8, 13)"
      ]
     },
     "execution_count": 31,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "r_minus = np.sum((diff < 0) * rank)\n",
    "r_plus = np.sum((diff > 0) * rank)\n",
    "\n",
    "r_minus, r_plus"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-08-24T07:05:46.234054Z",
     "start_time": "2018-08-24T07:05:46.224746Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>前</th>\n",
       "      <th>後</th>\n",
       "      <th>差</th>\n",
       "      <th>順位</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>59</td>\n",
       "      <td>60</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>52</td>\n",
       "      <td>54</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>55</td>\n",
       "      <td>58</td>\n",
       "      <td>3</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>61</td>\n",
       "      <td>65</td>\n",
       "      <td>4</td>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>59</td>\n",
       "      <td>64</td>\n",
       "      <td>5</td>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>45</td>\n",
       "      <td>51</td>\n",
       "      <td>6</td>\n",
       "      <td>6</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    前   後  差  順位\n",
       "0  59  60  1   1\n",
       "1  52  54  2   2\n",
       "2  55  58  3   3\n",
       "3  61  65  4   4\n",
       "4  59  64  5   5\n",
       "5  45  51  6   6"
      ]
     },
     "execution_count": 32,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "toy_df['後'] = toy_df['前'] + np.arange(1, 7)\n",
    "diff = toy_df['後'] - toy_df['前']\n",
    "rank = stats.rankdata(abs(diff)).astype(int)\n",
    "toy_df['差'] = diff\n",
    "toy_df['順位'] = rank\n",
    "toy_df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-08-24T07:05:46.239887Z",
     "start_time": "2018-08-24T07:05:46.235134Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(0, 21)"
      ]
     },
     "execution_count": 33,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "r_minus = np.sum((diff < 0) * rank)\n",
    "r_plus = np.sum((diff > 0) * rank)\n",
    "\n",
    "r_minus, r_plus"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-08-24T07:05:46.251061Z",
     "start_time": "2018-08-24T07:05:46.242490Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>前</th>\n",
       "      <th>後</th>\n",
       "      <th>差</th>\n",
       "      <th>順位</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>59</td>\n",
       "      <td>60</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>52</td>\n",
       "      <td>50</td>\n",
       "      <td>-2</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>55</td>\n",
       "      <td>52</td>\n",
       "      <td>-3</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>61</td>\n",
       "      <td>65</td>\n",
       "      <td>4</td>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>59</td>\n",
       "      <td>64</td>\n",
       "      <td>5</td>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>45</td>\n",
       "      <td>39</td>\n",
       "      <td>-6</td>\n",
       "      <td>6</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    前   後  差  順位\n",
       "0  59  60  1   1\n",
       "1  52  50 -2   2\n",
       "2  55  52 -3   3\n",
       "3  61  65  4   4\n",
       "4  59  64  5   5\n",
       "5  45  39 -6   6"
      ]
     },
     "execution_count": 34,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "toy_df['後'] = toy_df['前'] + [1, -2, -3, 4, 5, -6]\n",
    "diff = toy_df['後'] - toy_df['前']\n",
    "rank = stats.rankdata(abs(diff)).astype(int)\n",
    "toy_df['差'] = diff\n",
    "toy_df['順位'] = rank\n",
    "toy_df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-08-24T07:05:46.256477Z",
     "start_time": "2018-08-24T07:05:46.252136Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(11, 10)"
      ]
     },
     "execution_count": 35,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "r_minus = np.sum((diff < 0) * rank)\n",
    "r_plus = np.sum((diff > 0) * rank)\n",
    "\n",
    "r_minus, r_plus"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-08-24T07:05:46.261588Z",
     "start_time": "2018-08-24T07:05:46.257715Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.038"
      ]
     },
     "execution_count": 36,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "T, p = stats.wilcoxon(training_rel['前'], training_rel['後'])\n",
    "p"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 37,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-08-24T07:05:46.266378Z",
     "start_time": "2018-08-24T07:05:46.262703Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.038"
      ]
     },
     "execution_count": 37,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "T, p = stats.wilcoxon(training_rel['後'] - training_rel['前'])\n",
    "p"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 38,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-08-24T07:05:46.280201Z",
     "start_time": "2018-08-24T07:05:46.267547Z"
    }
   },
   "outputs": [],
   "source": [
    "n = 10000\n",
    "diffs = np.round(stats.norm(3, 4).rvs(size=(n, 20)))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 39,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-08-24T07:05:47.610383Z",
     "start_time": "2018-08-24T07:05:46.281743Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.883"
      ]
     },
     "execution_count": 39,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "cnt = 0\n",
    "alpha = 0.05\n",
    "for diff in diffs:\n",
    "    t, p = stats.ttest_1samp(diff, 0)\n",
    "    if p < alpha:\n",
    "        cnt += 1\n",
    "cnt / n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 40,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-08-24T07:05:49.268059Z",
     "start_time": "2018-08-24T07:05:47.611721Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.874"
      ]
     },
     "execution_count": 40,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "cnt = 0\n",
    "alpha = 0.05\n",
    "for diff in diffs:\n",
    "    T, p = stats.wilcoxon(diff)\n",
    "    if p < alpha:\n",
    "        cnt += 1\n",
    "cnt / n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### マン・ホイットニーのU検定"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 41,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-08-24T07:05:49.275994Z",
     "start_time": "2018-08-24T07:05:49.269119Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>A</th>\n",
       "      <th>B</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>47</td>\n",
       "      <td>49</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>50</td>\n",
       "      <td>52</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>37</td>\n",
       "      <td>54</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>60</td>\n",
       "      <td>48</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>39</td>\n",
       "      <td>51</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    A   B\n",
       "0  47  49\n",
       "1  50  52\n",
       "2  37  54\n",
       "3  60  48\n",
       "4  39  51"
      ]
     },
     "execution_count": 41,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "training_ind = pd.read_csv('../data/ch11_training_ind.csv')\n",
    "toy_df = training_ind[:5].copy()\n",
    "toy_df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 42,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-08-24T07:05:49.285873Z",
     "start_time": "2018-08-24T07:05:49.277475Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>A</th>\n",
       "      <th>B</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>3</td>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>6</td>\n",
       "      <td>8</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>1</td>\n",
       "      <td>9</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>10</td>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>2</td>\n",
       "      <td>7</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    A  B\n",
       "0   3  5\n",
       "1   6  8\n",
       "2   1  9\n",
       "3  10  4\n",
       "4   2  7"
      ]
     },
     "execution_count": 42,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "rank = stats.rankdata(np.concatenate([toy_df['A'],\n",
    "                                      toy_df['B']]))\n",
    "rank_df = pd.DataFrame({'A': rank[:5],\n",
    "                        'B': rank[5:10]}).astype(int)\n",
    "rank_df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 43,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-08-24T07:05:49.292820Z",
     "start_time": "2018-08-24T07:05:49.287606Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "7.000"
      ]
     },
     "execution_count": 43,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "n1 = len(rank_df['A'])\n",
    "u = rank_df['A'].sum() - (n1*(n1+1))/2\n",
    "u"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 44,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-08-24T07:05:49.301473Z",
     "start_time": "2018-08-24T07:05:49.294455Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>A</th>\n",
       "      <th>B</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>6</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2</td>\n",
       "      <td>7</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>3</td>\n",
       "      <td>8</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>4</td>\n",
       "      <td>9</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>5</td>\n",
       "      <td>10</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   A   B\n",
       "0  1   6\n",
       "1  2   7\n",
       "2  3   8\n",
       "3  4   9\n",
       "4  5  10"
      ]
     },
     "execution_count": 44,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "rank_df = pd.DataFrame(np.arange(1, 11).reshape(2, 5).T,\n",
    "                       columns=['A', 'B'])\n",
    "rank_df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 45,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-08-24T07:05:49.307579Z",
     "start_time": "2018-08-24T07:05:49.302805Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.000"
      ]
     },
     "execution_count": 45,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "u = rank_df['A'].sum() - (n1*(n1+1))/2\n",
    "u"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 46,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-08-24T07:05:49.316081Z",
     "start_time": "2018-08-24T07:05:49.309569Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>A</th>\n",
       "      <th>B</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>6</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>7</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>8</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>9</td>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>10</td>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    A  B\n",
       "0   6  1\n",
       "1   7  2\n",
       "2   8  3\n",
       "3   9  4\n",
       "4  10  5"
      ]
     },
     "execution_count": 46,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "rank_df = pd.DataFrame(np.arange(1, 11).reshape(2, 5)[::-1].T,\n",
    "                       columns=['A', 'B'])\n",
    "rank_df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 47,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-08-24T07:05:49.321425Z",
     "start_time": "2018-08-24T07:05:49.317214Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "25.000"
      ]
     },
     "execution_count": 47,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "u = rank_df['A'].sum() - (n1*(n1+1))/2\n",
    "u"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 48,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-08-24T07:05:49.327402Z",
     "start_time": "2018-08-24T07:05:49.322854Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.059"
      ]
     },
     "execution_count": 48,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "u, p = stats.mannwhitneyu(training_ind['A'], training_ind['B'],\n",
    "                          alternative='two-sided')\n",
    "p"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### カイ二乗検定"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 49,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-08-24T07:05:49.336570Z",
     "start_time": "2018-08-24T07:05:49.328538Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "1000\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>広告</th>\n",
       "      <th>購入</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>B</td>\n",
       "      <td>しなかった</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>B</td>\n",
       "      <td>しなかった</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>A</td>\n",
       "      <td>した</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>A</td>\n",
       "      <td>した</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>B</td>\n",
       "      <td>しなかった</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  広告     購入\n",
       "0  B  しなかった\n",
       "1  B  しなかった\n",
       "2  A     した\n",
       "3  A     した\n",
       "4  B  しなかった"
      ]
     },
     "execution_count": 49,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "ad_df = pd.read_csv('../data/ch11_ad.csv')\n",
    "n = len(ad_df)\n",
    "print(n)\n",
    "ad_df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 50,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-08-24T07:05:49.354107Z",
     "start_time": "2018-08-24T07:05:49.337684Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th>購入</th>\n",
       "      <th>した</th>\n",
       "      <th>しなかった</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>広告</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>A</th>\n",
       "      <td>49</td>\n",
       "      <td>351</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>B</th>\n",
       "      <td>51</td>\n",
       "      <td>549</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "購入  した  しなかった\n",
       "広告           \n",
       "A   49    351\n",
       "B   51    549"
      ]
     },
     "execution_count": 50,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "ad_cross = pd.crosstab(ad_df['広告'], ad_df['購入'])\n",
    "ad_cross"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 51,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-08-24T07:05:49.359743Z",
     "start_time": "2018-08-24T07:05:49.355421Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "広告\n",
       "A    0.1225\n",
       "B    0.0850\n",
       "dtype: float64"
      ]
     },
     "execution_count": 51,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "ad_cross['した'] / (ad_cross['した'] + ad_cross['しなかった'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 52,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-08-24T07:05:49.364603Z",
     "start_time": "2018-08-24T07:05:49.361124Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(100, 900)"
      ]
     },
     "execution_count": 52,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "n_yes, n_not = ad_cross.sum()\n",
    "n_yes, n_not"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 53,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-08-24T07:05:49.370216Z",
     "start_time": "2018-08-24T07:05:49.365857Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(400, 600)"
      ]
     },
     "execution_count": 53,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "n_adA, n_adB = ad_cross.sum(axis=1)\n",
    "n_adA, n_adB"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 54,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-08-24T07:05:49.379853Z",
     "start_time": "2018-08-24T07:05:49.371681Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>した</th>\n",
       "      <th>しなかった</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>A</th>\n",
       "      <td>40.0</td>\n",
       "      <td>360.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>B</th>\n",
       "      <td>60.0</td>\n",
       "      <td>540.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "     した  しなかった\n",
       "A  40.0  360.0\n",
       "B  60.0  540.0"
      ]
     },
     "execution_count": 54,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "ad_ef = pd.DataFrame({'した': [n_adA * n_yes / n,\n",
    "                              n_adB * n_yes / n],\n",
    "                      'しなかった': [n_adA * n_not / n,\n",
    "                                   n_adB * n_not / n]},\n",
    "                      index=['A', 'B'])\n",
    "ad_ef"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 55,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-08-24T07:05:49.385412Z",
     "start_time": "2018-08-24T07:05:49.380905Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "3.750"
      ]
     },
     "execution_count": 55,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "y = ((ad_cross - ad_ef) ** 2 / ad_ef).sum().sum()\n",
    "y"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 56,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-08-24T07:05:49.390070Z",
     "start_time": "2018-08-24T07:05:49.386617Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.053"
      ]
     },
     "execution_count": 56,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "rv = stats.chi2(1)\n",
    "1 - rv.cdf(y)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 57,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-08-24T07:05:49.395030Z",
     "start_time": "2018-08-24T07:05:49.391241Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(3.750, 0.053, 1)"
      ]
     },
     "execution_count": 57,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "chi2, p, dof, ef = stats.chi2_contingency(ad_cross,\n",
    "                                          correction=False)\n",
    "chi2, p, dof"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 58,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-08-24T07:05:49.399218Z",
     "start_time": "2018-08-24T07:05:49.396171Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[ 40., 360.],\n",
       "       [ 60., 540.]])"
      ]
     },
     "execution_count": 58,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "ef"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.5"
  },
  "toc": {
   "nav_menu": {},
   "number_sections": true,
   "sideBar": true,
   "skip_h1_title": false,
   "toc_cell": false,
   "toc_position": {},
   "toc_section_display": "block",
   "toc_window_display": false
  },
  "varInspector": {
   "cols": {
    "lenName": 16,
    "lenType": 16,
    "lenVar": 40
   },
   "kernels_config": {
    "python": {
     "delete_cmd_postfix": "",
     "delete_cmd_prefix": "del ",
     "library": "var_list.py",
     "varRefreshCmd": "print(var_dic_list())"
    },
    "r": {
     "delete_cmd_postfix": ") ",
     "delete_cmd_prefix": "rm(",
     "library": "var_list.r",
     "varRefreshCmd": "cat(var_dic_list()) "
    }
   },
   "types_to_exclude": [
    "module",
    "function",
    "builtin_function_or_method",
    "instance",
    "_Feature"
   ],
   "window_display": false
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
